#NumPy is a Python Library used to perform wide variety of Mathematical Operations on Arrays.
import numpy as np
#Pandas is a Python Library used to analyze big data and make Conclusions based on Stastical Theories.
import pandas as pd
#matplotlib.pyplot contains functions that make matplotlib work like MATLAB and
#each pyplot fuction makes some changes in the figure.
import matplotlib.pyplot as plt
#plotly.express contains functions that can create Entire Figures.
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import iplot, plot
from plotly.subplots import make_subplots
import seaborn as sns
Importing the Dataset.¶
#Loading the Dataset.
Data=pd.read_csv("C:\\Users\\Admin\\Desktop\\Madhu\\Anaconda-Jupyter\\Spotify 2000-2019 Jupyter Project\\Raw Data.csv")
#To get First 5 Rows of Dataset.
Data.head()
| artist | song | duration_ms | explicit | year | popularity | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | genre | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Britney Spears | Oops!...I Did It Again | 211160 | False | 2000 | 77 | 0.751 | 0.834 | 1 | -5.444 | 0 | 0.0437 | 0.3000 | 0.000018 | 0.3550 | 0.894 | 95.053 | pop |
| 1 | blink-182 | All The Small Things | 167066 | False | 1999 | 79 | 0.434 | 0.897 | 0 | -4.918 | 1 | 0.0488 | 0.0103 | 0.000000 | 0.6120 | 0.684 | 148.726 | rock, pop |
| 2 | Faith Hill | Breathe | 250546 | False | 1999 | 66 | 0.529 | 0.496 | 7 | -9.007 | 1 | 0.0290 | 0.1730 | 0.000000 | 0.2510 | 0.278 | 136.859 | pop, country |
| 3 | Bon Jovi | It's My Life | 224493 | False | 2000 | 78 | 0.551 | 0.913 | 0 | -4.063 | 0 | 0.0466 | 0.0263 | 0.000013 | 0.3470 | 0.544 | 119.992 | rock, metal |
| 4 | *NSYNC | Bye Bye Bye | 200560 | False | 2000 | 65 | 0.614 | 0.928 | 8 | -4.806 | 0 | 0.0516 | 0.0408 | 0.001040 | 0.0845 | 0.879 | 172.656 | pop |
#To get Last 5 Rows of Dataset.
Data.tail()
| artist | song | duration_ms | explicit | year | popularity | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | genre | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1995 | Jonas Brothers | Sucker | 181026 | False | 2019 | 79 | 0.842 | 0.734 | 1 | -5.065 | 0 | 0.0588 | 0.0427 | 0.000000 | 0.1060 | 0.952 | 137.958 | pop |
| 1996 | Taylor Swift | Cruel Summer | 178426 | False | 2019 | 78 | 0.552 | 0.702 | 9 | -5.707 | 1 | 0.1570 | 0.1170 | 0.000021 | 0.1050 | 0.564 | 169.994 | pop |
| 1997 | Blanco Brown | The Git Up | 200593 | False | 2019 | 69 | 0.847 | 0.678 | 9 | -8.635 | 1 | 0.1090 | 0.0669 | 0.000000 | 0.2740 | 0.811 | 97.984 | hip hop, country |
| 1998 | Sam Smith | Dancing With A Stranger (with Normani) | 171029 | False | 2019 | 75 | 0.741 | 0.520 | 8 | -7.513 | 1 | 0.0656 | 0.4500 | 0.000002 | 0.2220 | 0.347 | 102.998 | pop |
| 1999 | Post Malone | Circles | 215280 | False | 2019 | 85 | 0.695 | 0.762 | 0 | -3.497 | 1 | 0.0395 | 0.1920 | 0.002440 | 0.0863 | 0.553 | 120.042 | hip hop |
#To get Random 5 Sample Rows from the Dataset
Data.sample(5)
| artist | song | duration_ms | explicit | year | popularity | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | genre | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1605 | Desiigner | Panda | 246761 | True | 2016 | 70 | 0.576 | 0.766 | 10 | -4.943 | 0 | 0.4490 | 0.02800 | 0.000002 | 0.3660 | 0.236 | 144.833 | hip hop, pop |
| 88 | Marc Anthony | You Sang To Me | 347106 | False | 1999 | 56 | 0.578 | 0.894 | 10 | -5.420 | 1 | 0.0296 | 0.01030 | 0.000003 | 0.2160 | 0.741 | 165.980 | pop, latin |
| 1018 | Edward Maya | Stereo Love - Radio Edit | 184573 | False | 2010 | 66 | 0.799 | 0.783 | 1 | -3.896 | 0 | 0.0322 | 0.03460 | 0.018600 | 0.0757 | 0.586 | 127.041 | pop |
| 215 | Linkin Park | In the End | 216880 | False | 2000 | 83 | 0.556 | 0.864 | 3 | -5.870 | 0 | 0.0584 | 0.00958 | 0.000000 | 0.2090 | 0.400 | 105.143 | rock, metal |
| 697 | Chris Brown | Yo (Excuse Me Miss) | 229040 | False | 2005 | 69 | 0.536 | 0.612 | 4 | -5.847 | 1 | 0.2720 | 0.11900 | 0.000000 | 0.2090 | 0.570 | 86.768 | hip hop, pop, R&B |
Checking the Dataset.¶
#To get Shape and Size of the Data.
Data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2000 entries, 0 to 1999 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 artist 2000 non-null object 1 song 2000 non-null object 2 duration_ms 2000 non-null int64 3 explicit 2000 non-null bool 4 year 2000 non-null int64 5 popularity 2000 non-null int64 6 danceability 2000 non-null float64 7 energy 2000 non-null float64 8 key 2000 non-null int64 9 loudness 2000 non-null float64 10 mode 2000 non-null int64 11 speechiness 2000 non-null float64 12 acousticness 2000 non-null float64 13 instrumentalness 2000 non-null float64 14 liveness 2000 non-null float64 15 valence 2000 non-null float64 16 tempo 2000 non-null float64 17 genre 2000 non-null object dtypes: bool(1), float64(9), int64(5), object(3) memory usage: 267.7+ KB
#To get the Shape of the Dataset.
Data.shape
(2000, 18)
#To get all the Information regarding the Dataset.
Data.describe()
| duration_ms | year | popularity | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2000.000000 | 2000.00000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 | 2000.000000 |
| mean | 228748.124500 | 2009.49400 | 59.872500 | 0.667438 | 0.720366 | 5.378000 | -5.512434 | 0.553500 | 0.103568 | 0.128955 | 0.015226 | 0.181216 | 0.551690 | 120.122558 |
| std | 39136.569008 | 5.85996 | 21.335577 | 0.140416 | 0.152745 | 3.615059 | 1.933482 | 0.497254 | 0.096159 | 0.173346 | 0.087771 | 0.140669 | 0.220864 | 26.967112 |
| min | 113000.000000 | 1998.00000 | 0.000000 | 0.129000 | 0.054900 | 0.000000 | -20.514000 | 0.000000 | 0.023200 | 0.000019 | 0.000000 | 0.021500 | 0.038100 | 60.019000 |
| 25% | 203580.000000 | 2004.00000 | 56.000000 | 0.581000 | 0.622000 | 2.000000 | -6.490250 | 0.000000 | 0.039600 | 0.014000 | 0.000000 | 0.088100 | 0.386750 | 98.985750 |
| 50% | 223279.500000 | 2010.00000 | 65.500000 | 0.676000 | 0.736000 | 6.000000 | -5.285000 | 1.000000 | 0.059850 | 0.055700 | 0.000000 | 0.124000 | 0.557500 | 120.021500 |
| 75% | 248133.000000 | 2015.00000 | 73.000000 | 0.764000 | 0.839000 | 8.000000 | -4.167750 | 1.000000 | 0.129000 | 0.176250 | 0.000068 | 0.241000 | 0.730000 | 134.265500 |
| max | 484146.000000 | 2020.00000 | 89.000000 | 0.975000 | 0.999000 | 11.000000 | -0.276000 | 1.000000 | 0.576000 | 0.976000 | 0.985000 | 0.853000 | 0.973000 | 210.851000 |
#To check the Null Values in the Dataset.
Data.isnull().sum()
artist 0 song 0 duration_ms 0 explicit 0 year 0 popularity 0 danceability 0 energy 0 key 0 loudness 0 mode 0 speechiness 0 acousticness 0 instrumentalness 0 liveness 0 valence 0 tempo 0 genre 0 dtype: int64
Analysing and Visualizing the Dataset.¶
- To know the Number of Artists and their Hit Songs in the Dataset.
#We get to know there are 835 Artists in Total.
Artist=Data['artist'].value_counts()
Artist
artist
Rihanna 25
Drake 23
Eminem 21
Calvin Harris 20
Britney Spears 19
..
Sidney Samson 1
Cam’ron 1
Elvis Presley 1
Lucenzo 1
Blanco Brown 1
Name: count, Length: 835, dtype: int64
- To get the List of Artists based on thier Popularity.
Popular_Artist=Data.groupby('artist')[['artist','explicit','danceability','popularity','loudness',
'energy','speechiness','instrumentalness','acousticness','liveness',
'genre']].sum().sort_values('popularity',ascending=False)
Popular_Artist
| artist | explicit | danceability | popularity | loudness | energy | speechiness | instrumentalness | acousticness | liveness | genre | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| artist | |||||||||||
| Rihanna | RihannaRihannaRihannaRihannaRihannaRihannaRiha... | 8 | 16.482 | 1803 | -137.276 | 16.844 | 1.9983 | 0.153460 | 3.811065 | 3.7089 | hip hop, pop, R&Bhip hop, pop, R&Bhip hop, pop... |
| Eminem | EminemEminemEminemEminemEminemEminemEminemEmin... | 20 | 15.603 | 1519 | -83.568 | 16.374 | 4.6724 | 0.001437 | 2.212270 | 4.0052 | hip hophip hophip hophip hophip hophip hophip ... |
| Drake | DrakeDrakeDrakeDrakeDrakeDrakeDrakeDrakeDrakeD... | 18 | 16.859 | 1424 | -164.954 | 13.767 | 3.7478 | 0.100460 | 1.995427 | 3.8368 | hip hop, pop, R&Bhip hop, pop, R&Bhip hop, pop... |
| Calvin Harris | Calvin HarrisCalvin HarrisCalvin HarrisCalvin ... | 1 | 13.719 | 1356 | -77.276 | 17.429 | 1.1381 | 1.041232 | 1.559682 | 4.3994 | hip hop, pop, Dance/Electronichip hop, pop, Da... |
| Britney Spears | Britney SpearsBritney SpearsBritney SpearsBrit... | 1 | 13.457 | 1198 | -104.603 | 14.198 | 1.3786 | 0.028039 | 4.452690 | 2.8750 | poppoppoppoppoppoppoppoppoppoppoppoppoppoppopp... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Silentó | Silentó | 0 | 0.819 | 0 | -8.522 | 0.768 | 0.1340 | 0.000000 | 0.234000 | 0.3340 | hip hop |
| Storm Queen | Storm Queen | 0 | 0.832 | 0 | -8.035 | 0.815 | 0.0810 | 0.005530 | 0.003040 | 0.2630 | Dance/Electronic |
| AJR | AJR | 0 | 0.673 | 0 | -4.518 | 0.637 | 0.0429 | 0.000000 | 0.137000 | 0.1840 | rock |
| The Lumineers | The Lumineers | 0 | 0.664 | 0 | -6.429 | 0.576 | 0.0286 | 0.000198 | 0.630000 | 0.0902 | pop, Folk/Acoustic |
| Kungs | Kungs | 0 | 0.790 | 0 | -4.684 | 0.705 | 0.0383 | 0.000048 | 0.080700 | 0.2510 | pop, Dance/Electronic |
835 rows × 11 columns
3)(i) To get the Top 10 Popular Artists.
Popular_Artist.head(10)
| artist | explicit | danceability | popularity | loudness | energy | speechiness | instrumentalness | acousticness | liveness | genre | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| artist | |||||||||||
| Rihanna | RihannaRihannaRihannaRihannaRihannaRihannaRiha... | 8 | 16.482 | 1803 | -137.276 | 16.844 | 1.9983 | 0.153460 | 3.811065 | 3.7089 | hip hop, pop, R&Bhip hop, pop, R&Bhip hop, pop... |
| Eminem | EminemEminemEminemEminemEminemEminemEminemEmin... | 20 | 15.603 | 1519 | -83.568 | 16.374 | 4.6724 | 0.001437 | 2.212270 | 4.0052 | hip hophip hophip hophip hophip hophip hophip ... |
| Drake | DrakeDrakeDrakeDrakeDrakeDrakeDrakeDrakeDrakeD... | 18 | 16.859 | 1424 | -164.954 | 13.767 | 3.7478 | 0.100460 | 1.995427 | 3.8368 | hip hop, pop, R&Bhip hop, pop, R&Bhip hop, pop... |
| Calvin Harris | Calvin HarrisCalvin HarrisCalvin HarrisCalvin ... | 1 | 13.719 | 1356 | -77.276 | 17.429 | 1.1381 | 1.041232 | 1.559682 | 4.3994 | hip hop, pop, Dance/Electronichip hop, pop, Da... |
| Britney Spears | Britney SpearsBritney SpearsBritney SpearsBrit... | 1 | 13.457 | 1198 | -104.603 | 14.198 | 1.3786 | 0.028039 | 4.452690 | 2.8750 | poppoppoppoppoppoppoppoppoppoppoppoppoppoppopp... |
| David Guetta | David GuettaDavid GuettaDavid GuettaDavid Guet... | 2 | 11.089 | 1185 | -69.850 | 14.444 | 1.3803 | 0.176915 | 1.622460 | 4.0110 | hip hop, pop, Dance/Electronichip hop, pop, Da... |
| Taylor Swift | Taylor SwiftTaylor SwiftTaylor SwiftTaylor Swi... | 0 | 10.105 | 1125 | -94.796 | 11.222 | 1.2955 | 0.012130 | 1.158330 | 2.1732 | poppoppoppoppoppoppoppoppoppoppoppoppoppoppoppop |
| Chris Brown | Chris BrownChris BrownChris BrownChris BrownCh... | 6 | 11.652 | 1080 | -87.028 | 11.546 | 1.8561 | 0.004574 | 0.760747 | 2.3684 | hip hop, pop, R&Bhip hop, pop, R&Bhip hop, pop... |
| Kanye West | Kanye WestKanye WestKanye WestKanye WestKanye ... | 14 | 10.658 | 1049 | -105.484 | 12.002 | 3.7812 | 1.000080 | 1.765526 | 4.3074 | hip hophip hophip hophip hophip hophip hophip ... |
| Katy Perry | Katy PerryKaty PerryKaty PerryKaty PerryKaty P... | 0 | 10.233 | 1033 | -73.576 | 12.462 | 0.8995 | 0.000055 | 0.373918 | 3.4892 | poppoppoppoppoppoppoppoppoppoppoppoppoppoppoppop |
3)(ii) To Plot the Bar Graph.
Name=Artist[:10].index
Songs=Artist[:10]
fig=plt.figure(figsize=(15,5))
plt.bar(Name,Songs,width=0.4,color="red")
plt.xticks(rotation=45,ha='right',fontsize=13)
plt.yticks(fontsize=15)
plt.xlabel('Artists',fontsize=20)
plt.ylabel('Number of Songs',fontsize=20)
plt.title('Top 10 Artists with Hit Songs',color='black',fontsize=30)
plt.show()
4)(i) To get the List of Artists with the Average Popularity of their Songs.
Average_Popularity=Data[['artist','popularity']].groupby('artist').mean().sort_values(by='artist')
Artists=Artist.sort_index()
Average_Popularity['total songs']=Artists.values
Average_Popularity.sort_values(by='total songs',ascending=False,inplace=True)
Average_Popularity.reset_index(inplace=True)
Average_Popularity
| artist | popularity | total songs | |
|---|---|---|---|
| 0 | Rihanna | 72.120000 | 25 |
| 1 | Drake | 61.913043 | 23 |
| 2 | Eminem | 72.333333 | 21 |
| 3 | Calvin Harris | 67.800000 | 20 |
| 4 | Britney Spears | 63.052632 | 19 |
| ... | ... | ... | ... |
| 830 | Khia | 59.000000 | 1 |
| 831 | Kevin Rudolf | 66.000000 | 1 |
| 832 | Kevin Lyttle | 67.000000 | 1 |
| 833 | Kevin Gates | 69.000000 | 1 |
| 834 | Kungs | 0.000000 | 1 |
835 rows × 3 columns
4)(ii) To Plot the Scatter Graph of Top 20.
iplot(px.scatter(Average_Popularity[:20],x='artist',y='popularity',size_max=40,size='total songs',
color='popularity',title='Top 20 Artists with Average Popularity of their Top Hits',
labels={'artist':'Artist','popularity':'Popularity'}))
4)(iii) To Plot the Scatter Graph of Top 10.
iplot(px.scatter(Average_Popularity[:10],x='artist',y='popularity',size_max=40,size='total songs',
color='popularity',title='<b>Top 10 Artists with Average Popularity of their Top Hits</b>',
labels={'artist':'Artist','popularity':'Popularity'}))
5)(i) To get the List of all the Genres.
Genre=Data['genre'].value_counts()
Genre
genre pop 428 hip hop, pop 277 hip hop, pop, R&B 244 pop, Dance/Electronic 221 pop, R&B 178 hip hop 124 hip hop, pop, Dance/Electronic 78 rock 58 rock, pop 43 Dance/Electronic 41 rock, metal 38 pop, latin 28 pop, rock 26 set() 22 hip hop, Dance/Electronic 16 latin 15 pop, rock, metal 14 hip hop, pop, latin 14 R&B 13 pop, rock, Dance/Electronic 13 country 10 metal 9 hip hop, pop, rock 9 rock, pop, Dance/Electronic 8 pop, Folk/Acoustic 8 pop, country 8 pop, R&B, Dance/Electronic 6 rock, pop, metal 4 hip hop, pop, R&B, Dance/Electronic 3 hip hop, R&B 3 hip hop, pop, R&B, latin 3 World/Traditional, pop, Folk/Acoustic 2 pop, rock, Folk/Acoustic 2 pop, easy listening, jazz 2 World/Traditional, rock 2 rock, blues, latin 2 World/Traditional, rock, pop 2 hip hop, rock, pop 2 rock, blues 2 Folk/Acoustic, pop 2 World/Traditional, hip hop 2 rock, pop, metal, Dance/Electronic 1 rock, classical 1 country, latin 1 hip hop, latin, Dance/Electronic 1 rock, Folk/Acoustic, pop 1 easy listening 1 Folk/Acoustic, rock, pop 1 rock, easy listening 1 pop, R&B, easy listening 1 World/Traditional, pop 1 hip hop, pop, country 1 rock, Dance/Electronic 1 rock, R&B, Folk/Acoustic, pop 1 World/Traditional, Folk/Acoustic 1 pop, easy listening, Dance/Electronic 1 Folk/Acoustic, rock 1 rock, Folk/Acoustic, easy listening 1 hip hop, country 1 Name: count, dtype: int64
5)(ii) To Plot a Histogram.
Fig=px.histogram(Data.groupby('genre',as_index=False).count().sort_values(by='song',ascending=False),
x='genre',y='song',color_discrete_sequence=['green'],template='plotly_dark',
marginal='box',title='<b>Total no of Songs based on Genres</b>',
labels={'sum of song':'Total No of Songs','genre':'Genres'})
Fig.update_layout(title_x=0.5)
#Top 10 can also be plotted by making: .sort_values(by='song',ascending=False).head(10)
5)(iii) To Plot a Bar Graph of Top 10 Genres.
Genre_Name=Genre[:10].index
Number_of_Songs=Genre[:10]
fig=plt.figure(figsize=(10,4))
plt.bar(Genre_Name,Number_of_Songs,width=0.5,color='blue')
plt.xticks(rotation=45,ha='right',fontsize=10)
plt.yticks(fontsize=10)
plt.xlabel("Genres",fontsize=15)
plt.ylabel("No of Songs",fontsize=15)
plt.title("Top 10 Genres that are Hit",color='black',fontsize=20)
plt.show()
5)(iv) To Plot a Bar Graph of Top 5 Genres
Genre_Name=Genre[:5].index
No_of_Songs=Genre[:5]
fig=plt.figure(figsize=(10,4))
plt.bar(Genre_Name,No_of_Songs,width=0.5,color='green')
plt.xticks(rotation=45,ha='right',fontsize=10)
plt.yticks(fontsize=10)
plt.xlabel("Genre Names",fontsize=15)
plt.ylabel("No of Songs",fontsize=15)
plt.title("Top 5 Genres that are Hit",color='black',fontsize=15)
plt.show()
6)(i) To get the List of all the Hit Songs realesed in each Year.
Song_Year=Data['year'].value_counts()
Song_Year
year 2012 115 2017 111 2001 108 2018 107 2010 107 2014 104 2005 104 2011 99 2016 99 2015 99 2003 97 2008 97 2004 96 2006 95 2007 94 2002 90 2019 89 2013 89 2009 84 2000 74 1999 38 2020 3 1998 1 Name: count, dtype: int64
6)(ii) To Plot the Bar Graph.
Song_Year=Data['year'].value_counts().rename_axis('year').reset_index(name='songs')
plt.figure(figsize=(15,6))
plt.xticks(rotation=45,ha='right',fontsize=13)
plt.yticks(fontsize=15)
plt.title("No of songs realesed in each year",fontsize=30)
plt.xlabel('',fontsize=20)
plt.ylabel('',fontsize=20)
Graph=sns.barplot(x="year",y="songs",data=Song_Year,palette="flare")
Graph.bar_label(Graph.containers[0],fontsize=10)
Graph.set(xlabel='Year',ylabel='No of Songs')
plt.show(Graph)
7)(i) To get a List of Top 25 Popular Songs.
Data.sort_values('popularity',axis=0,ascending=False).head(25)[['popularity','song','artist']]
| popularity | song | artist | |
|---|---|---|---|
| 1322 | 89 | Sweater Weather | The Neighbourhood |
| 1311 | 88 | Another Love | Tom Odell |
| 201 | 87 | Without Me | Eminem |
| 1613 | 86 | Wait a Minute! | WILLOW |
| 6 | 86 | The Real Slim Shady | Eminem |
| 1929 | 86 | lovely (with Khalid) | Billie Eilish |
| 1819 | 86 | lovely (with Khalid) | Billie Eilish |
| 1512 | 85 | Daddy Issues | The Neighbourhood |
| 1709 | 85 | Perfect | Ed Sheeran |
| 1999 | 85 | Circles | Post Malone |
| 1304 | 85 | Locked out of Heaven | Bruno Mars |
| 1227 | 85 | Locked out of Heaven | Bruno Mars |
| 209 | 85 | 'Till I Collapse | Eminem |
| 1514 | 85 | The Nights | Avicii |
| 1601 | 84 | One Dance | Drake |
| 1712 | 84 | Bored | Billie Eilish |
| 1700 | 84 | Shape of You | Ed Sheeran |
| 1910 | 84 | Someone You Loved | Lewis Capaldi |
| 1701 | 84 | No Lie | Sean Paul |
| 1408 | 84 | All of Me | John Legend |
| 1511 | 84 | The Hills | The Weeknd |
| 1805 | 84 | Lucid Dreams | Juice WRLD |
| 1344 | 84 | Do I Wanna Know? | Arctic Monkeys |
| 1326 | 84 | Why'd You Only Call Me When You're High? | Arctic Monkeys |
| 1905 | 84 | Lost in the Fire (feat. The Weeknd) | Gesaffelstein |
7)(ii) To Plot the Line Graph.
Figure=px.line(Data.sort_values(by='popularity',ascending=False).head(25),x='song',y='popularity',
hover_data=['artist'],color_discrete_sequence=['green'],markers=True,
title='<b> Top 25 Popular Songs in Spotify',
labels={'popularity':'Popularity','song':'Songs'})
Figure.update_layout(title_x=0.5)
Figure.show()
7)(iii) To get the List of Top 10 Popular Songs.
Data.sort_values('popularity',axis=0,ascending=False).head(10)[['popularity','song','artist']]
| popularity | song | artist | |
|---|---|---|---|
| 1322 | 89 | Sweater Weather | The Neighbourhood |
| 1311 | 88 | Another Love | Tom Odell |
| 201 | 87 | Without Me | Eminem |
| 1613 | 86 | Wait a Minute! | WILLOW |
| 6 | 86 | The Real Slim Shady | Eminem |
| 1929 | 86 | lovely (with Khalid) | Billie Eilish |
| 1819 | 86 | lovely (with Khalid) | Billie Eilish |
| 1512 | 85 | Daddy Issues | The Neighbourhood |
| 1709 | 85 | Perfect | Ed Sheeran |
| 1999 | 85 | Circles | Post Malone |
7)(iv) To get the List of Top 5 Popular Songs.
Data.sort_values('popularity',axis=0,ascending=False).head(5)[['popularity','song','artist']]
| popularity | song | artist | |
|---|---|---|---|
| 1322 | 89 | Sweater Weather | The Neighbourhood |
| 1311 | 88 | Another Love | Tom Odell |
| 201 | 87 | Without Me | Eminem |
| 1613 | 86 | Wait a Minute! | WILLOW |
| 6 | 86 | The Real Slim Shady | Eminem |
8)(i) To Convert Duration from ms into min:sec.
#To Define a Function to Convert Duration from ms into min:sec.
def ms_to_minsec(ms):
sec=ms/1000
return f"{int(sec//60)}:{int(sec%60)}"
#To Skip the Year 1998 and 1999.
Duration=Data[['duration_ms','year']].groupby('year').mean().reset_index().iloc[2:22]
#To Convert ms into min:sec.
Duration['duration_s']=Duration['duration_ms']/1000
#To Apply ms_to_minsec(ms) Function.
Duration['min:sec']=Duration['duration_ms'].apply(ms_to_minsec)
8)(ii) To Plot the Line Graph of Duration of Songs.
iplot(px.line(Duration,x='year',y='duration_s',color_discrete_sequence=['red'],
template='plotly_dark',title='<b>Average Song duration over the Years</b>',
text='min:sec',labels={'year':'Year','duration_s':'Duration'}).update_xaxes(type=
'category').update_traces(textposition='top right'))
We Observe that the Average Duration of Hit Songs kept on Decreasing Year by Year. This Indicates that the People are liking Short Duration Songs.
9)(i) To Find the Longest Hit Song.
Longest_Song=Data.loc[:,['artist','song','duration_ms','year']]
Longest_Song['min:sec']=Longest_Song['duration_ms'].apply(ms_to_minsec)
Longest_Song[Longest_Song.duration_ms==Longest_Song.duration_ms.max()]
| artist | song | duration_ms | year | min:sec | |
|---|---|---|---|---|---|
| 1381 | Justin Timberlake | Mirrors | 484146 | 2013 | 8:4 |
9)(ii) To Find the Shortest Hit Song.
Shortest_Song=Data.loc[:,['artist','song','duration_ms','year']]
Shortest_Song['min:sec']=Shortest_Song['duration_ms'].apply(ms_to_minsec)
Shortest_Song[Shortest_Song.duration_ms==Shortest_Song.duration_ms.min()]
| artist | song | duration_ms | year | min:sec | |
|---|---|---|---|---|---|
| 1931 | Lil Nas X | Old Town Road | 113000 | 2019 | 1:53 |
10)(i) To Get the Correlation Matrix.
Numeric_data = Data.select_dtypes(include=[np.number])
Correlation_matrix = Numeric_data.corr()
Correlation_matrix
| duration_ms | year | popularity | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| duration_ms | 1.000000 | -0.316534 | 0.050617 | -0.060057 | -0.078763 | -0.002560 | -0.079912 | -0.003848 | 0.066998 | 0.010923 | -0.004208 | 0.024941 | -0.116870 | -0.028603 |
| year | -0.316534 | 1.000000 | -0.003825 | 0.033532 | -0.108644 | 0.007380 | 0.017479 | -0.007358 | 0.001111 | 0.033809 | -0.050265 | -0.027037 | -0.209365 | 0.076867 |
| popularity | 0.050617 | -0.003825 | 1.000000 | -0.003546 | -0.014021 | 0.014823 | 0.030632 | -0.021353 | 0.021162 | 0.024619 | -0.048059 | -0.009856 | -0.016142 | 0.014288 |
| danceability | -0.060057 | 0.033532 | -0.003546 | 1.000000 | -0.104038 | 0.032731 | -0.033315 | -0.067528 | 0.145590 | -0.065429 | 0.023207 | -0.126413 | 0.403178 | -0.173418 |
| energy | -0.078763 | -0.108644 | -0.014021 | -0.104038 | 1.000000 | -0.003446 | 0.651016 | -0.040651 | -0.057018 | -0.445469 | 0.037861 | 0.156761 | 0.334474 | 0.153719 |
| key | -0.002560 | 0.007380 | 0.014823 | 0.032731 | -0.003446 | 1.000000 | -0.007474 | -0.153182 | 0.007147 | 0.002365 | -0.008173 | -0.033071 | 0.036977 | -0.001431 |
| loudness | -0.079912 | 0.017479 | 0.030632 | -0.033315 | 0.651016 | -0.007474 | 1.000000 | -0.028133 | -0.076388 | -0.310039 | -0.104925 | 0.102159 | 0.232150 | 0.080709 |
| mode | -0.003848 | -0.007358 | -0.021353 | -0.067528 | -0.040651 | -0.153182 | -0.028133 | 1.000000 | -0.000077 | 0.005744 | -0.038613 | 0.025439 | -0.074681 | 0.048434 |
| speechiness | 0.066998 | 0.001111 | 0.021162 | 0.145590 | -0.057018 | 0.007147 | -0.076388 | -0.000077 | 1.000000 | 0.000394 | -0.062954 | 0.061172 | 0.073605 | 0.057747 |
| acousticness | 0.010923 | 0.033809 | 0.024619 | -0.065429 | -0.445469 | 0.002365 | -0.310039 | 0.005744 | 0.000394 | 1.000000 | -0.005214 | -0.110043 | -0.128128 | -0.103660 |
| instrumentalness | -0.004208 | -0.050265 | -0.048059 | 0.023207 | 0.037861 | -0.008173 | -0.104925 | -0.038613 | -0.062954 | -0.005214 | 1.000000 | -0.034897 | -0.015192 | 0.034608 |
| liveness | 0.024941 | -0.027037 | -0.009856 | -0.126413 | 0.156761 | -0.033071 | 0.102159 | 0.025439 | 0.061172 | -0.110043 | -0.034897 | 1.000000 | 0.019040 | 0.028636 |
| valence | -0.116870 | -0.209365 | -0.016142 | 0.403178 | 0.334474 | 0.036977 | 0.232150 | -0.074681 | 0.073605 | -0.128128 | -0.015192 | 0.019040 | 1.000000 | -0.025076 |
| tempo | -0.028603 | 0.076867 | 0.014288 | -0.173418 | 0.153719 | -0.001431 | 0.080709 | 0.048434 | 0.057747 | -0.103660 | 0.034608 | 0.028636 | -0.025076 | 1.000000 |
10)(ii) To Plot the Heatmap using Seaborn.
plt.figure(figsize=(12,9))
sns.heatmap(Correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()
- To Plot a Pie Chart to know How many Songs have Explicit Content.
Figure=px.pie(Data.groupby('explicit',as_index=False).count().sort_values(by='song',ascending=False),
names='explicit',values='song',labels={'song':'Total songs'},hole=0.7,
color_discrete_sequence=['green','crimson'],template='plotly_dark',
title='Songs that have Explicit Content')
Figure.update_layout(title_x=0.5)
72.4% of the Songs dont have any Explicit Content.So, most of the Songs are Children Friendly. Less is the Explicit Content, more is the chance of that Song becoming Top Hit.
- To Plot the Scatter Graph of Top 10 Explicit Popular Songs.
Top_Explicit_Songs=Data[Data['explicit']==1].nlargest(10,'popularity')
plt.figure(figsize=(10,6))
plt.scatter(Top_Explicit_Songs['song'],Top_Explicit_Songs['popularity'],
s=Top_Explicit_Songs['popularity']*0.2,#s=shape
c=Top_Explicit_Songs['popularity'],#c=color
cmap='tab10')
plt.xlabel('Song',fontsize=15)
plt.ylabel('Popularity',fontsize=15)
plt.title('Top 10 Explicit Popular Songs',fontsize=20)
plt.xticks(rotation=45, ha='right')
plt.colorbar(label='Popularity')
plt.tight_layout()
plt.show()
13)(i) To get the Year with Most Explicit Songs.
Explicit_Counts_in_Year=Data.groupby('year')['explicit'].sum()
Year_with_Most_Explicit=Explicit_Counts_in_Year.idxmax()
Most_Explicit_Count=Explicit_Counts_in_Year.max()
Year_with_Most_Explicit
2018
13)(ii) To Plot the Bar Graph.
plt.figure(figsize=(10,6))
plt.bar(Explicit_Counts_in_Year.index,Explicit_Counts_in_Year.values,color='green')
plt.bar(Year_with_Most_Explicit,Most_Explicit_Count,color='orange')
plt.xlabel('Year',fontsize=15)
plt.ylabel('Number of Explicit Songs',fontsize=15)
plt.title('Year with Most Explicit Songs',fontsize=25)
plt.tight_layout()
plt.show()
- To Plot the Histogram Subplots.
Figure=make_subplots(rows=3,cols=3,subplot_titles=('Popularity','Danceability','Energy',
'Loudness','Speechiness','Acousticness',
'Liveness','Valence','Tempo'))
Figure.add_trace(go.Histogram(x=Data['popularity'],name='popularity'),row=1,col=1)
Figure.add_trace(go.Histogram(x=Data['danceability'],name='danceability'),row=1,col=2)
Figure.add_trace(go.Histogram(x=Data['energy'],name='energy'),row=1,col=3)
Figure.add_trace(go.Histogram(x=Data['loudness'],name='loudness'),row=2,col=1)
Figure.add_trace(go.Histogram(x=Data['speechiness'],name='speechiness'),row=2,col=2)
Figure.add_trace(go.Histogram(x=Data['acousticness'],name='acousticness'),row=2,col=3)
Figure.add_trace(go.Histogram(x=Data['liveness'],name='liveness'),row=3,col=1)
Figure.add_trace(go.Histogram(x=Data['valence'],name='valence'),row=3,col=2)
Figure.add_trace(go.Histogram(x=Data['tempo'],name='tempo'),row=3,col=3)
Figure.update_layout(height=900,width=900,title_text='<b>Feature Distribution')
Figure.update_layout(template='plotly_dark',title_x=0.5)
New_Data=Data[['duration_ms','year','popularity','danceability','energy','loudness','speechiness',
'acousticness','instrumentalness','liveness','valence','tempo']]
Data_1={'Heading':['Speechiness','Acousticness','Instrumentalness','Liveness','Valence','Tempo'],
'Data':[New_Data['speechiness'].mean(),New_Data['acousticness'].mean(),
New_Data['instrumentalness'].mean(),New_Data['liveness'].mean(),
New_Data['valence'].mean(),New_Data['tempo'].mean()]}
A=pd.DataFrame(Data_1)
A
| Heading | Data | |
|---|---|---|
| 0 | Speechiness | 0.103568 |
| 1 | Acousticness | 0.128955 |
| 2 | Instrumentalness | 0.015226 |
| 3 | Liveness | 0.181216 |
| 4 | Valence | 0.551690 |
| 5 | Tempo | 120.122558 |
a) From the above Subplots, we can suggest that : -
- The Danceability and Energy on Average for the Top Hits are generally HIGH, indicating that these Songs are high on Intensity.
- The Loudness of the Top Hits are also HIGH compared to the rest of the Songs.
b) From the above Analysis, we can suggest that : -
- The Speechiness of the Top Hits are quite LOW.
- The Acousticness of the Tracks are also LOW, suggesting that Songs which tend to become Top Hits are HIGH in Energy and Tempo.
- The Instrumentalness of the Songs are also LOW, which gives a pretty clear idea as to how Music taste has evolved amongst the Listeners.
- The late 20th century was dominated by the Bands and as the Years progressed, we got to witness a decline in the number of Bands being popular and thus Instrumentalness also declined over the years except few of them .
- The Songs which became Top Hits, score LOW on Liveness indicating that the Songs are mostly Recorded.
- Top Hits on Average, score just above 0.5 on Valence, indicating that while Most of the Top Hits have a Positive mood associated with them, some have quite an equal mixture of both Positive and Sad sounding.
- The Tempo for Most of the Top Hits on Average is 120bpm, suggesting that the Songs are Peppy and Fast sounding.
Questions to be Answered.¶
- To get the Average Popularity of the Songs released in each Year.
#Grouping the Data by Year.
Grouped_by_Year=Data.groupby('year')
#Calculate the Mean of Popularity within Each Group.
Popularity_by_Year=Grouped_by_Year['popularity'].mean()
#Display the Average Popularity for Each Year.
print(Popularity_by_Year)
year 1998 49.000000 1999 64.921053 2000 58.554054 2001 59.407407 2002 62.400000 2003 58.793814 2004 58.427083 2005 61.759615 2006 57.389474 2007 59.925532 2008 63.721649 2009 61.535714 2010 60.747664 2011 58.565657 2012 60.556522 2013 54.786517 2014 59.384615 2015 57.747475 2016 54.171717 2017 53.882883 2018 70.850467 2019 63.011236 2020 43.333333 Name: popularity, dtype: float64
- To find the Most Popular Genre for Songs with the Duration Longer than 5 Minutes.
#Filter the Data for Songs with Duration more than 5 Minutes.
Filtered_Data=Data[Data['duration_ms']>300000]
#Group the Filtered Data by Genre.
Grouped_by_Genre=Filtered_Data.groupby('genre')
#Calculate the Mean Popularity within each Genre.
Popularity_by_Genre=Grouped_by_Genre['popularity'].mean()
#Identify the Genre with the Highest Average Popularity.
Most_Popular_Genre=Popularity_by_Genre.idxmax()
print("The Most Popular Genre for Songs with Duration more than 5 minutes is:",Most_Popular_Genre)
The Most Popular Genre for Songs with Duration more than 5 minutes is: hip hop, Dance/Electronic
- To find the Artist with Highest Average Danceability and their Most Popular Song.
#Group the Data by Artists.
Grouped_by_Artist=Data.groupby('artist')
#Calculate the Mean Danceability within each Artist Group.
Danceability_by_Artist=Grouped_by_Artist['danceability'].mean()
#Identify the Artist with the Highest Average Danceability.
Artist_with_Highest_Danceability=Danceability_by_Artist.idxmax()
#Filter the Data for Songs by the Artist.
Filtered_by_Artist=Data[Data['artist']==Artist_with_Highest_Danceability]
#Finding the Song with the Highest Popularity for that Artist.
Most_Popular_Song=Filtered_by_Artist.loc[Filtered_by_Artist['popularity'].idxmax(),'song']
print("The Artist with the Highest Average Danceability is:",Artist_with_Highest_Danceability)
print("Their Most Popular Song is:",Most_Popular_Song)
The Artist with the Highest Average Danceability is: Lil Baby Their Most Popular Song is: Yes Indeed
Conclusions.¶
- RIHANNA is the Top Artist with 25 Hit Songs.
- POP seems to be the Most Popular type of Genre with 428 Songs of the Top Hits Spotify Songs since 2000-2019.
- In the Year 2012, we got the Most number of Top Hits with 115 Songs.
- SWEATER WEATHER by The Neighbourhood is the Most Popular Song.
- Average Duration of the Songs kept on decreasing year by year.
- Less the Explicit Content, more is the Chances of that Song to become Top Hit.
- Most of the Hit Songs scored LOW on Speechiness, Liveliness, Instrumentalness and Acousticness which means they are Soothing and feels Relaxed.